Emile Cohen
June 2020
Goal: In this notebook, we want to understand what makes Glioma an outlier for the patterns we saw, and what are the major subcohorts that drive the outlying signal.
%run -i '../../../../../utils/setup_environment.ipy'
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import fisher_exact, ranksums, chi2, norm
from statsmodels.sandbox.stats.multicomp import multipletests
import matplotlib.gridspec as gridspec
import pickle
data_path = '../../../../../data/'
data_wgd = data_path + 'impact-facets-tp53/processed/wgd/'
data_no_wgd = data_path + 'impact-facets-tp53/processed/no_wgd/'
pwd
from functools import reduce
def get_hotspots(df: pd.DataFrame, Sample_Type: str, group: list = None, group_type:str = None):
data = df[df['Sample_Type'] == Sample_Type]
if group and group_type:
data = data[data[group_type].isin(group)]
data_1 = get_groupby(data,'tp53_spot_1', 'count'); data_2 = get_groupby(data,'tp53_spot_2', 'count'); data_3 = get_groupby(data,'tp53_spot_3', 'count') ; data_4 = get_groupby(data,'tp53_spot_4', 'count') ; data_5 = get_groupby(data,'tp53_spot_5', 'count')
series_data = [data_1,data_2,data_3,data_4,data_5]
df_merged = reduce(lambda left,right: pd.merge(left,right,left_index=True, right_index=True,
how='outer'), series_data).fillna(0)
df_merged.columns = ['count_1', 'count_2', 'count_3', 'count_4', 'count_5']
df_merged['total'] = df_merged.sum(axis=1)
df_merged = df_merged.sort_values(by='total', ascending=False)
df_merged = df_merged.drop('nan')
return df_merged
def get_hotspot_frac(df: pd.DataFrame, group_type:str = None, group: list = None, nb = 10):
if group_type and group:
df = df[df[group_type].isin(group)]
result = [['spot', '#', 'frac']]
for spot in get_groupby(df, 'tp53_spot_1', 'count').sort_values(by='count', ascending=False).head(nb).index.tolist():
result.append([spot,df[df['tp53_spot_1'] == spot].frac_genome_altered.shape[0], df[df['tp53_spot_1'] == spot].frac_genome_altered.median()])
return pd.DataFrame(result)
def boxplot_sampletype(df: pd.DataFrame, group:str, palette, order, metrics: str, figsize= (10,3), title: str = '', title_font: int=12, xlim=[0,1]):
fig=plt.figure(figsize=figsize)
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
sns.boxplot(y=metrics, x=group,data=df,ax=ax, dodge=False,order=order, palette=palette).set_title(title, weight='bold', fontsize=title_font)
groupby_ = get_groupby(df,group, 'count')
groupby_ = groupby_.T
for mut in mutation_list:
if mut not in groupby_.columns:
groupby_[mut] = 0
groupby_ = groupby_.T
labels = []
for element in order:
labels.append(element + '\n('+ str(groupby_.loc[element].values[0])+')')
ax.set_xticklabels(labels)
style(ax)
ax.set_ylim(xlim)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
return fig, ax
# Let's give a look at medians and statistics
def get_statistics(df: pd.DataFrame, group:str, metrics: str, group_list: list):
group_1 = df[df[group] == group_list[0]][metrics]
group_2 = df[df[group] == group_list[1]][metrics]
median_1 = group_1.median()
median_2 = group_2.median()
statistic, p_value = ranksums(group_1.dropna().values,group_2.dropna().values)
results = [['', 'size', metrics],
[group_list[0], group_1.shape[0], median_1],
[group_list[1], group_2.shape[0], median_2],
['', 'Statistics', 'p-value'],
['', statistic, p_value]]
return pd.DataFrame(results)
def get_major_codrivers(master: pd.DataFrame, maf: pd.DataFrame, head:int = 10, tp53=False):
samples = master.Tumor_Id.tolist()
if tp53:
maf_filtered = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['driver'] == True]
else:
maf_filtered = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['driver'] == True][maf['Hugo_Symbol'] != 'TP53']
h = pd.DataFrame(maf_filtered[['Hugo_Symbol']].groupby(['Hugo_Symbol']).size())
h.columns = ['count']
h = h.sort_values(by='count', ascending=False).head(head)
return(h)
def create_co_drivers_table(master: pd.DataFrame, group_type:str, group_1: str, group_2: str):
master_group_1 = master[master[group_type] == group_1]
co_drivers_group_1 = get_major_codrivers(master=master_group_1,
maf=maf_cohort_nowgd,
head=100)
co_drivers_group_1['proportion_1'] = co_drivers_group_1.apply(lambda x: 100* round(x['count'] / co_drivers_group_1.sum().values[0], 4), axis=1)
master_group_2 = master[master[group_type] == group_2]
co_drivers_group_2 = get_major_codrivers(master=master_group_2,
maf=maf_cohort_nowgd,
head=100)
co_drivers_group_2['proportion_2'] = co_drivers_group_2.apply(lambda x: 100* round(x['count'] / co_drivers_group_2.sum().values[0], 4), axis=1)
co_drivers_groups = pd.merge(co_drivers_group_1, co_drivers_group_2, on='Hugo_Symbol')
co_drivers_groups['proportion_1'] = - co_drivers_groups['proportion_1']
return co_drivers_groups
cancer='Glioma'
master_no_wgd = non_wgd_load_and_cut(data_path + 'impact-facets-tp53/processed/no_wgd/master_no_wgd.pkl')
master_wgd = pd.read_pickle(data_path + 'impact-facets-tp53/processed/wgd/master_wgd.pkl')
master_no_wgd_cancer = master_no_wgd[master_no_wgd['Cancer_Type'] == cancer]
master_wgd_cancer = master_wgd[master_wgd['Cancer_Type'] == cancer]
maf_cohort_nowgd = pd.read_csv(data_path + 'impact-facets-tp53/processed/no_wgd/maf_cohort_nowgd.txt', sep='\t').drop('Unnamed: 0', axis=1)
maf_cohort_wgd = pd.read_csv(data_path + 'impact-facets-tp53/processed/wgd/maf_cohort_wgd.txt', sep='\t').drop('Unnamed: 0', axis=1)
Glioma is the cancer with the lowest WGD proportion, around 5%:


Glioma is highly enriched in Primary Samples, in >1muts subgroup and in composite mutation samples in general

Glioma is one of the three cancers that shows an outlying signal in Genome Instability Pattern: while almost all cancers have a correlation between tp53 bi allelic state, Glioma, Pancreatic Cancer an d Melanoma do not show this GI difference.

Glioma shows an outlying pattern:
We have already seen that the WGD proportion of Glioma was really low. Here we see that the WGD cohort size is very small in comparison with the Non-WGD Cohort size for Glioma.


In WGD cohort, Genome Instability median is above 70% for all cancer types.

In this section, our goal is to find subcohorts that lead the signals observed. Here are the different subcohort we will create:
In this section, we cut our cohort to only keep samples with exactly one TP53 mutation, for simplicity.

master_hotspot = master_no_wgd_cancer[master_no_wgd_cancer['tp53_count'] == 1]
get_hotspot_frac(df=master_hotspot,
group_type=None,
group=None)
h = get_groupby(master_hotspot,'tp53_vc_group_1', 'count').sort_values(by='count', ascending=False)
display(h)
h = h.T
h = h[mutation_list]
fig = plt.figure(figsize=(6,1))
ax = plt.subplot()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
h_plot = h.plot(kind = 'barh', stacked=True, yticks=[], ax=ax, colormap="Accent")
ax.legend(['In Frame', 'Truncated', 'Missense', 'Hotspot 248','Hotspot 273','Hotspot 175', 'Other Hotspot'],loc='center left', bbox_to_anchor=(1.2, 0.5), fontsize=11)
ax.set_title('Mutation Type - {} - No WGD'.format(cancer), weight='bold', fontsize=18)
plt.show()
fig, ax = boxplot_sampletype(df=master_hotspot,
group='tp53_vc_group_1',
palette=mutation_palette,
order=mutation_list,
metrics='frac_genome_altered',
figsize=(6,10),
title='Fraction of Genome Altered - {}'.format(cancer),
xlim=[0,1])
plt.show()
print('Number of Bi Allelic samples (with 1 mut): ' + str(master_hotspot[master_hotspot['tp53_res_group'] == 'no_tp53_res'].shape[0]))
print('')
print('Number of TP53 Residual samples (with 1 mut): ' + str(master_hotspot[master_hotspot['tp53_res_group'] == 'tp53_res'].shape[0]))
total_df = []
for group in ['tp53_res', 'no_tp53_res']:
h = get_groupby(master_hotspot[master_hotspot['tp53_res_group'] == group], 'tp53_vc_group_1', group).sort_values(by=group, ascending=False)
total_df.append(h)
h=h.T
for mut in mutation_list:
if mut not in h.columns:
h[mut] = 0
h = h[mutation_list]
fig = plt.figure(figsize=(6,1))
ax = plt.subplot()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
h_plot = h.plot(kind = 'barh', stacked=True, yticks=[], ax=ax, colormap="Accent")
if group == 'tp53_res':
ax.legend(['In Frame', 'Truncated', 'Missense', 'Hotspot 248','Hotspot 273','Hotspot 175', 'Other Hotspot'],loc='center left', bbox_to_anchor=(1.05, 0.5), fontsize=11)
else: ax.get_legend().remove()
ax.set_title('Mutation Type - {} - No WGD'.format(group), weight='bold', fontsize=18)
plt.show()
display_side_by_side(total_df[0],total_df[1])
for group in ['tp53_res', 'no_tp53_res']:
master_wt = master_hotspot[master_hotspot['tp53_res_group'] == group]
fig, ax = boxplot_sampletype(df=master_wt,
group='tp53_vc_group_1',
palette=mutation_palette,
order=mutation_list,
metrics='frac_genome_altered',
figsize=(6,10),
title='Fraction of Genome Altered - No WGD - {} subgroup'.format(group),
xlim=[0,1])
plt.show()
In this section we compare SNV and INDEL mutations. As in the previous section, we cut the cohort to keep only samples with exactly 1 tp53 mutation.
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer,
group='tp53_group',
palette=palette,
order=group_list,
metrics='driver_mutation_count',
figsize=(8,12),
title='Driver Mutation Count - TP53 Subroups - No WGD',
xlim=[-0.1,10])
plt.show()
display_side_by_side(get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='driver_mutation_count',
group_list=['1_WILD_TYPE', '0_HETLOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='driver_mutation_count',
group_list=['1_WILD_TYPE', '>=1_LOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='driver_mutation_count',
group_list=['>1muts', '>=1_LOSS']))
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer,
group='tp53_group',
palette=palette,
order=group_list,
metrics='snv_driver_mutation_count',
figsize=(8,12),
title='SNV Driver Mutation Count - TP53 Subroups - No WGD',
xlim=[-0.1,15])
plt.show()
display_side_by_side(get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='snv_driver_mutation_count',
group_list=['1_WILD_TYPE', '0_HETLOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='snv_driver_mutation_count',
group_list=['1_WILD_TYPE', '>=1_LOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='snv_driver_mutation_count',
group_list=['>1muts', '>=1_LOSS']))
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer,
group='tp53_group',
palette=palette,
order=group_list,
metrics='indel_driver_mutation_count',
figsize=(8,12),
title='INDEL Driver Mutation Count - TP53 Subroups - No WGD',
xlim=[-0.1,35])
plt.show()
display_side_by_side(get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='indel_driver_mutation_count',
group_list=['1_WILD_TYPE', '0_HETLOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='indel_driver_mutation_count',
group_list=['1_WILD_TYPE', '>=1_LOSS']),
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='indel_driver_mutation_count',
group_list=['>1muts', '>=1_LOSS']))
Here, one major information:
The idea here is to see if we have differences in Fraction of Genome Altered if we cut our Cancer cohort on the number of drivers per sample.
Do we have more instability with more INDEL Driver Mutations within the same subgroup?
master_no_wgd_cancer_wt = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '1_WILD_TYPE']
thr=1
def get_driver_groups(x):
if x.indel_driver_mutation_count > thr:
return 'High Co-Driver Count'
if x.indel_driver_mutation_count <= thr:
return 'Low Co-Driver Count'
master_no_wgd_cancer_wt['co_driver_group'] = master_no_wgd_cancer_wt.apply(get_driver_groups, axis=1)
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer_wt,
group='co_driver_group',
palette={'High Co-Driver Count': '#FF9900' , 'Low Co-Driver Count': '#146EB4'},
order=['High Co-Driver Count', 'Low Co-Driver Count'],
metrics='frac_genome_altered',
figsize=(4,10),
title='Fraction of Genome Altered - 1_WILD_TYPE subgroup - Co Driver Count (thr={}) - {}'.format(thr,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_no_wgd_cancer_wt,
group='co_driver_group',
metrics='frac_genome_altered',
group_list=['High Co-Driver Count', 'Low Co-Driver Count'])
master_no_wgd_cancer_het = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '0_HETLOSS']
thr=1
def get_driver_groups(x):
if x.indel_driver_mutation_count > thr:
return 'High Co-Driver Count'
if x.indel_driver_mutation_count <= thr:
return 'Low Co-Driver Count'
master_no_wgd_cancer_het['co_driver_group'] = master_no_wgd_cancer_het.apply(get_driver_groups, axis=1)
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer_het,
group='co_driver_group',
palette={'High Co-Driver Count': '#FF9900' , 'Low Co-Driver Count': '#146EB4'},
order=['High Co-Driver Count', 'Low Co-Driver Count'],
metrics='frac_genome_altered',
figsize=(4,10),
title='Fraction of Genome Altered - 0_HETLOSS subgroup - Co Driver Count (thr={}) - {}'.format(thr,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_no_wgd_cancer_het,
group='co_driver_group',
metrics='frac_genome_altered',
group_list=['High Co-Driver Count', 'Low Co-Driver Count'])
codrivers_glioma = get_major_codrivers(master=master_no_wgd_cancer,
maf=maf_cohort_nowgd,
head=15)
codrivers_glioma_tp53 = get_major_codrivers(master=master_no_wgd_cancer[master_no_wgd_cancer['tp53_count'] >= 1],
maf=maf_cohort_nowgd,
head=15)
co_drivers = pd.merge(codrivers_glioma, codrivers_glioma_tp53, on='Hugo_Symbol')
co_drivers.columns = ['cancer', 'cancer_tp53']
co_drivers['ratio'] = co_drivers.apply(lambda x: 100*round(x.cancer_tp53/x.cancer, 4) , axis=1)
co_drivers = co_drivers.sort_values(by='ratio', ascending=False)
co_drivers
labels = []
for element in co_drivers.index.tolist():
labels.append(element + ' ('+ str(int(co_drivers.loc[element]['cancer']))+')')
ax = sns.barplot(y=co_drivers.index, x='ratio',data=co_drivers[['ratio']], color='#7F8C8D', saturation=.2)
ax.set_yticklabels(labels)
ax.set_title('Co-Drivers Enrichment in TP53 State')
codrivers_glioma
labels = []
codrivers_glioma = get_major_codrivers(master=master_no_wgd_cancer,
maf=maf_cohort_nowgd,
head=15,
tp53=True)
codrivers_glioma['proportion'] = codrivers_glioma.apply(lambda x: 100* round(x['count'] / codrivers_glioma.sum().values[0], 4), axis=1)
for element in codrivers_glioma.head(15).index.tolist():
labels.append(element + ' ('+ str(int(codrivers_glioma.loc[element]['count']))+')')
ax = sns.barplot(y=codrivers_glioma.head(15).index, x='proportion',data=codrivers_glioma.head(15)[['proportion']], color='#7F8C8D', saturation=.2)
ax.set_yticklabels(labels)
ax.set_title('Drivers Frequency in {}'.format(cancer))
co_drivers_res = create_co_drivers_table(master=master_no_wgd_cancer,
group_type='tp53_res_group',
group_1='tp53_res',
group_2='no_tp53_res')
co_drivers_res
fig=plt.figure(figsize=(7,7))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_res[['proportion_1', 'proportion_2']].head(10)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = ['#2ECC71','#1E8449'])
ax.legend(['TP53 Residual', 'No TP53 Residual'], fontsize=10)
ax.set_title('Co-Drivers Proportion per TP53 State', fontsize=14)
plt.yticks(fontsize=10)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [25, 20, 15, 10, 5, 0, 5, 10, 15, 20]
ax.set_xticklabels(a, fontsize=10)
plt.grid(b=None)
plt.show()
co_drivers_cnloh_loss = create_co_drivers_table(master=master_no_wgd_cancer,
group_type='tp53_group',
group_1='>=1_cnLOH',
group_2='>=1_LOSS')
co_drivers_cnloh_loss
labels = []
for element in co_drivers_cnloh_loss.head(10).index.tolist():
labels.append(element + ' ('+ str(int(co_drivers_cnloh_loss.loc[element]['count_x']))+')')
ax = sns.barplot(y=co_drivers_cnloh_loss.head(10).index, x='proportion_1',data=co_drivers_cnloh_loss.head(10)[['proportion_1']], color='#7F8C8D', saturation=.2)
ax.set_yticklabels(labels)
ax.set_title('Co-Drivers Frequency in {} - {}'.format(subgroup, cancer))
fig=plt.figure(figsize=(7,7))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_cnloh_loss[['proportion_1', 'proportion_2']].head(10)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = [mc[4],mc[0]])
ax.legend(['>=1_cnLOH', '>=1_LOSS'], fontsize=5)
ax.set_title('Co-Drivers Proportion per TP53 State')
plt.yticks(fontsize=7)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [30, 20, 10, 0, 10, 20, 30]
ax.set_xticklabels(a, fontsize=8)
plt.grid(b=None)
plt.show()
Here we see two different strong signals:
co_drivers_losses = create_co_drivers_table(master=master_no_wgd_cancer,
group_type='tp53_group',
group_1='0_HETLOSS',
group_2='>=1_LOSS')
co_drivers_losses
fig=plt.figure(figsize=(5,5))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_losses[['proportion_1', 'proportion_2']].head(10)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = [mc[5],mc[0]])
ax.legend(['0_HETLOSS', '>=1_LOSS'], fontsize=5)
ax.set_title('Co-Drivers Proportion per TP53 State')
plt.yticks(fontsize=7)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [40, 30, 20, 10, 0, 10, 20, 30]
ax.set_xticklabels(a, fontsize=8)
plt.grid(b=None)
plt.show()
co_drivers_mult_cnloh = create_co_drivers_table(master=master_no_wgd_cancer,
group_type='tp53_group',
group_1='>1muts',
group_2='>=1_cnLOH')
co_drivers_mult_cnloh
fig=plt.figure(figsize=(5,5))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_mult_cnloh[['proportion_1', 'proportion_2']].head(10)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = [mc[3],mc[4]])
ax.legend(['>1muts', '>=1_cnLOH'], fontsize=5)
ax.set_title('Co-Drivers Proportion per TP53 State')
plt.yticks(fontsize=7)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [20, 15, 10, 5, 0, 5, 10, 15, 20, 25, 30]
ax.set_xticklabels(a, fontsize=8)
plt.grid(b=None)
plt.show()
co_drivers_wt_loss = create_co_drivers_table(master=master_no_wgd_cancer,
group_type='tp53_group',
group_1='1_WILD_TYPE',
group_2='>=1_LOSS')
co_drivers_wt_loss
fig=plt.figure(figsize=(5,5))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_wt_loss[['proportion_1', 'proportion_2']].head(10)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = [mc[2],mc[0]])
ax.legend(['1_WILD_TYPE', '>=1_LOSS'], fontsize=5)
ax.set_title('Co-Drivers Proportion per TP53 State')
plt.yticks(fontsize=7)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [30, 20, 10, 0, 10, 20, 30]
ax.set_xticklabels(a, fontsize=8)
plt.grid(b=None)
plt.show()
def get_major_codrivers(master: pd.DataFrame, maf: pd.DataFrame, head:int = 10):
samples = master.Tumor_Id.tolist()
maf_filtered = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['driver'] == True][maf['Hugo_Symbol'] != 'TP53']
h = pd.DataFrame(maf_filtered[['Hugo_Symbol']].groupby(['Hugo_Symbol']).size())
h.columns = ['count']
h = h.sort_values(by='count', ascending=False).head(head)
return(h)
def create_co_drivers_table_wgd(master_1: pd.DataFrame, master_2: pd.DataFrame, group_type:str, group_1: str):
master_group_1 = master_1[master_1[group_type] == group_1]
co_drivers_group_1 = get_major_codrivers(master=master_group_1,
maf=maf_cohort_nowgd,
head=100)
co_drivers_group_1['proportion_1'] = co_drivers_group_1.apply(lambda x: 100* round(x['count'] / co_drivers_group_1.sum().values[0], 4), axis=1)
master_group_2 = master_2[master_2['tp53_count'] >=1][master_2['tp53_loh_status'] == True]
co_drivers_group_2 = get_major_codrivers(master=master_group_2,
maf=maf_cohort_wgd,
head=100)
co_drivers_group_2['proportion_2'] = co_drivers_group_2.apply(lambda x: 100* round(x['count'] / co_drivers_group_2.sum().values[0], 4), axis=1)
co_drivers_groups = pd.merge(co_drivers_group_2, co_drivers_group_1, on='Hugo_Symbol')
co_drivers_groups['proportion_2'] = - co_drivers_groups['proportion_2']
return co_drivers_groups
co_drivers_wgd_loss = create_co_drivers_table_wgd(master_1=master_no_wgd_cancer,
master_2=master_wgd_cancer,
group_type='tp53_group',
group_1='>=1_LOSS')
co_drivers_wgd_loss
fig=plt.figure(figsize=(5,5))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['top'].set_visible(False)
co_drivers_wgd_loss[['proportion_2', 'proportion_1']].head(15)[::-1].plot.barh(stacked=True, ax=ax, width=1, color = ['#7F8C8D',mc[0]])
ax.legend(['WGD - TP53 - LOH', '>=1_LOSS'], fontsize=5)
ax.set_title('Co-Drivers Proportion per TP53 State')
plt.yticks(fontsize=7)
ax.set_ylabel('')
a=ax.get_xticks().tolist()
a = [-15, -10, -5, 0, 5, 10, 15, 20, 25, 30]
ax.set_xticklabels(a, fontsize=8)
plt.grid(b=None)
plt.show()
def get_master_codrivers(master: pd.DataFrame, maf: pd.DataFrame, symbol: str):
samples = master.Tumor_Id.tolist()
samples_final = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['Hugo_Symbol'] == symbol].Tumor_Sample_Barcode.tolist()
master_filtered = master[master.Tumor_Id.isin(samples_final)]
return master_filtered
master_no_wgd_cancer_res = master_no_wgd_cancer[master_no_wgd_cancer['tp53_res_group'] == 'no_tp53_res']
master_IDH1 = get_master_codrivers(master=master_no_wgd_cancer_res,
maf=maf_cohort_nowgd,
symbol='IDH1')
master_ATRX = get_master_codrivers(master=master_no_wgd_cancer_res,
maf=maf_cohort_nowgd,
symbol='ATRX')
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_res,
maf=maf_cohort_nowgd,
symbol='TERT')
master_no_wgd_cancer_res['data'] = 'no_tp53_res'
master_IDH1['data'] = 'IDH1'
master_ATRX['data'] = 'ATRX'
master_TERT['data'] = 'TERT'
masters = [master_no_wgd_cancer_res, master_IDH1, master_ATRX, master_TERT]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - No TP53 Residual subgroup')
ax.set_xlabel('')
master_no_wgd_cancer_cnloh = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '>=1_cnLOH']
master_IDH1 = get_master_codrivers(master=master_no_wgd_cancer_cnloh,
maf=maf_cohort_nowgd,
symbol='IDH1')
master_ATRX = get_master_codrivers(master=master_no_wgd_cancer_cnloh,
maf=maf_cohort_nowgd,
symbol='ATRX')
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_cnloh,
maf=maf_cohort_nowgd,
symbol='TERT')
master_no_wgd_cancer_cnloh['data'] = '>=1_cnLOH'
master_IDH1['data'] = 'IDH1'
master_ATRX['data'] = 'ATRX'
master_TERT['data'] = 'TERT'
masters = [master_no_wgd_cancer_cnloh, master_IDH1, master_ATRX, master_TERT]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - >=1_cnLOH')
ax.set_xlabel('')
master_no_wgd_cancer_loss = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '>=1_LOSS']
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='TERT')
master_PTEN = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='PTEN')
master_RB1 = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='RB1')
master_no_wgd_cancer_loss['data'] = '>=1_LOSS'
master_TERT['data'] = 'TERT'
master_PTEN['data'] = 'PTEN'
master_RB1['data'] = 'RB1'
masters = [master_no_wgd_cancer_loss, master_TERT, master_PTEN, master_RB1]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - >=1_LOSS')
ax.set_xlabel('')
master_no_wgd_cancer_loss = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '0_HETLOSS']
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='TERT')
master_PTEN = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='PTEN')
master_RB1 = get_master_codrivers(master=master_no_wgd_cancer_loss,
maf=maf_cohort_nowgd,
symbol='RB1')
master_no_wgd_cancer_loss['data'] = '0_HETLOSS'
master_TERT['data'] = 'TERT'
master_PTEN['data'] = 'PTEN'
master_RB1['data'] = 'RB1'
masters = [master_no_wgd_cancer_loss, master_TERT, master_PTEN, master_RB1]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - 0_HETLOSS')
ax.set_xlabel('')
master_no_wgd_cancer_muts = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '>1muts']
master_IDH1 = get_master_codrivers(master=master_no_wgd_cancer_muts,
maf=maf_cohort_nowgd,
symbol='IDH1')
master_ATRX = get_master_codrivers(master=master_no_wgd_cancer_muts,
maf=maf_cohort_nowgd,
symbol='ATRX')
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_muts,
maf=maf_cohort_nowgd,
symbol='TERT')
master_no_wgd_cancer_muts['data'] = '>1muts'
master_IDH1['data'] = 'IDH1'
master_ATRX['data'] = 'ATRX'
master_TERT['data'] = 'TERT'
masters = [master_no_wgd_cancer_muts, master_IDH1, master_ATRX, master_TERT]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - >1muts')
ax.set_xlabel('')
master_no_wgd_cancer_wt = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == '1_WILD_TYPE']
master_EGFR = get_master_codrivers(master=master_no_wgd_cancer_wt,
maf=maf_cohort_nowgd,
symbol='EGFR')
master_PTEN = get_master_codrivers(master=master_no_wgd_cancer_wt,
maf=maf_cohort_nowgd,
symbol='PTEN')
master_TERT = get_master_codrivers(master=master_no_wgd_cancer_wt,
maf=maf_cohort_nowgd,
symbol='TERT')
master_no_wgd_cancer_wt['data'] = '1_WT'
master_EGFR['data'] = 'EGFR'
master_PTEN['data'] = 'PTEN'
master_TERT['data'] = 'TERT'
masters = [master_no_wgd_cancer_wt, master_PTEN, master_EGFR, master_TERT]
allMasters = pd.concat(masters)
fig=plt.figure(figsize=(5,10))
ax = plt.subplot2grid(shape=(2,1), loc=(0,0), colspan=1)
allMasters[['frac_genome_altered', 'data']].boxplot(by="data", ax=ax)
ax.set_title('Fraction of Genome Altered - 1_WILD_TYPE')
ax.set_xlabel('')
Same here we take only samples with exactly 1 tp53 mutation (master_hotspot).
We have to define groups for CCF to see if there are differences between those groups. To have an idea of the CCF distribution we show here the distribution coming from the cancer_panel.

We see that our tp53_ccf distribution is very high for all subgroups except for 1_WILD_TYPE.
It will be hard to cut the cohort based on the CCF. Let's try and see the size of the subcohorts:
master_ccf = master_no_wgd_cancer[(master_no_wgd_cancer['tp53_count'] == 1) | (master_no_wgd_cancer['tp53_group'] == '0_HETLOSS')]
thr_ccf_1 = 0.90 ; thr_ccf_2 = 0.95
def ccf_subgroup(x):
if x.tp53_ccf_1 <= thr_ccf_1: return 'low'
elif x.tp53_ccf_1 <= thr_ccf_2: return 'medium'
elif x.tp53_ccf_1 > thr_ccf_2: return 'high'
master_ccf['ccf_group'] = master_ccf.apply(ccf_subgroup, axis=1)
get_groupby(master_ccf, 'ccf_group', 'count')
thr_vaf_1 = 0.3 ; thr_vaf_2 = 0.4
def vaf_subgroup(x):
if x.tp53_vaf_1 <= thr_vaf_1: return 'low'
elif x.tp53_vaf_1 <= thr_vaf_2: return 'medium'
elif x.tp53_vaf_1 > thr_vaf_2: return 'high'
master_ccf['vaf_group'] = master_ccf.apply(vaf_subgroup, axis=1)
get_groupby(master_ccf, 'vaf_group', 'count')
fig, ax = boxplot_sampletype(df=master_ccf,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE','0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - {}'.format(cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_ccf,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['0_HETLOSS', '1_WILD_TYPE'])
master_low = master_ccf[(master_ccf['vaf_group'] == 'low') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_low,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE','0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - VAF < {} - {}'.format(thr_vaf_1,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_low,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '>=1_LOSS'])
master_med = master_ccf[(master_ccf['vaf_group'] == 'medium') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_med,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE', '0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - {} < VAF < {} - {}'.format(thr_vaf_1,thr_vaf_2,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_med,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '0_HETLOSS'])
master_high = master_ccf[(master_ccf['vaf_group'] == 'high') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_high,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE', '0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - VAF > {} - {}'.format(thr_vaf_2,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_high,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '0_HETLOSS'])
fig, ax = boxplot_sampletype(df=master_ccf,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE','0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - {}'.format(cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_ccf,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['0_HETLOSS', '1_WILD_TYPE'])
master_low = master_ccf[(master_ccf['ccf_group'] == 'low') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_low,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE','0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - CCF < {} - {}'.format(thr_ccf_1,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_low,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '0_HETLOSS'])
master_med = master_ccf[(master_ccf['ccf_group'] == 'medium') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_med,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE', '0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - {} < CCF < {} - {}'.format(thr_ccf_1,thr_ccf_2,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_med,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '0_HETLOSS'])
master_high = master_ccf[(master_ccf['ccf_group'] == 'high') | (master_ccf['tp53_group'] == '0_HETLOSS')]
fig, ax = boxplot_sampletype(df=master_high,
group='tp53_group',
palette=palette,
order=['1_WILD_TYPE', '0_HETLOSS', '>=1_LOSS', '>=1_cnLOH'],
metrics='frac_genome_altered',
figsize=(5,10),
title='Fraction of Genome Altered - CCF > {} - {}'.format(thr_ccf_2,cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_high,
group='tp53_group',
metrics='frac_genome_altered',
group_list=['1_WILD_TYPE', '0_HETLOSS'])
fig, ax = boxplot_sampletype(df=master_hotspot,
group='vaf_group',
palette={'low': tab10[0] , 'medium': tab10[1], 'high':tab10[2]},
order=['low', 'medium', 'high'],
metrics='frac_genome_altered',
figsize=(3,10),
title='Fraction of Genome Altered - VAF levels - {}'.format(cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_hotspot,
group='vaf_group',
metrics='frac_genome_altered',
group_list=['low', 'medium'])
fig, ax = boxplot_sampletype(df=master_hotspot,
group='ccf_group',
palette={'low': tab10[0] , 'medium': tab10[1], 'high':tab10[2]},
order=['low', 'medium', 'high'],
metrics='frac_genome_altered',
figsize=(3,10),
title='Fraction of Genome Altered - CCF levels - {}'.format(cancer),
xlim=[0,1])
plt.show()
get_statistics(df=master_hotspot,
group='ccf_group',
metrics='frac_genome_altered',
group_list=['low', 'medium'])
Something weird happens with >=1_cnLOH. Indeed, this subgroup has a very low Fraction of Genome Altered, but in the meantime it is the subgroup with the lowest median Age. This is counterintuitive ... Let's try to see which part of >=1_cnLOH is leading the signal
master_hotspot_cnloh = master_hotspot[master_hotspot['tp53_group'] == '>=1_cnLOH']
get_hotspot_frac(df=master_hotspot_cnloh,
group_type=None,
group=None)
h = get_groupby(master_hotspot_cnloh,'tp53_vc_group_1', 'count').sort_values(by='count', ascending=False)
display(h)
h = h.T
h = h[mutation_list]
fig = plt.figure(figsize=(6,1))
ax = plt.subplot()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
h_plot = h.plot(kind = 'barh', stacked=True, yticks=[], ax=ax, colormap="Accent")
ax.legend(['In Frame', 'Truncated', 'Missense', 'Hotspot 248','Hotspot 273','Hotspot 175', 'Other Hotspot'],loc='center left', bbox_to_anchor=(1.2, 0.5), fontsize=11)
ax.set_title('Mutation Type - {} - >=1_cnLOH - No WGD'.format(cancer), weight='bold', fontsize=18)
plt.show()
In comparison with the entire Glioma cohort, we have a clear enrichment in Hotspot 273 (22 out of 29 are in >=1_cnLOH subgroup)
fig, ax = boxplot_sampletype(df=master_hotspot_cnloh,
group='tp53_vc_group_1',
palette=mutation_palette,
order=mutation_list,
metrics='frac_genome_altered',
figsize=(6,10),
title='Fraction of Genome Altered - {} - >=1_cnLOH'.format(cancer),
xlim=[0,1])
plt.show()
In this section, we want to see what are the main co-drivers in >=1_cnLOH subgroup.
h = master_hotspot_cnloh.driver_gene_count.mean()
print('Each >=1_cnLOH sample as an average of ' + str(h) + ' co-driver genes')
def get_major_codrivers(master: pd.DataFrame, maf: pd.DataFrame, head:int = 10):
samples = master.Tumor_Id.tolist()
maf_filtered = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['driver'] == True][maf['Hugo_Symbol'] != 'TP53']
h = pd.DataFrame(maf_filtered[['Hugo_Symbol']].groupby(['Hugo_Symbol']).size())
h.columns = ['count']
h = h.sort_values(by='count', ascending=False).head(head)
return(h)
get_major_codrivers(master=master_hotspot_cnloh,
maf=maf_cohort_nowgd,
head=10)
get_major_codrivers(master=master_hotspot,
maf=maf_cohort_nowgd,
head=10)
In comparison with entire Glioma Cohort:
symbol = 'IDH1'
master = master_hotspot_cnloh
maf=maf_cohort_nowgd
def get_master_codrivers(master: pd.DataFrame, maf: pd.DataFrame, symbol: str):
samples = master.Tumor_Id.tolist()
samples_final = maf[maf.Tumor_Sample_Barcode.isin(samples)][maf['Hugo_Symbol'] == symbol].Tumor_Sample_Barcode.tolist()
master_filtered = master[master.Tumor_Id.isin(samples_final)]
return master_filtered
master_IDH1 = get_master_codrivers(master=master_hotspot_cnloh,
maf=maf_cohort_nowgd,
symbol='IDH1')
master_ATRX = get_master_codrivers(master=master_hotspot_cnloh,
maf=maf_cohort_nowgd,
symbol='ATRX')
master_TERT = get_master_codrivers(master=master_hotspot_cnloh,
maf=maf_cohort_nowgd,
symbol='TERT')
master_hotspot_cnloh.frac_genome_altered.median()
master_IDH1.frac_genome_altered.median()
master_ATRX.frac_genome_altered.median()
master_TERT.frac_genome_altered.median()
master_hotspot_cnloh['data'] = '>=1_cnLOH'
master_IDH1['data'] = 'IDH1'
master_ATRX['data'] = 'ATRX'
master_TERT['data'] = 'TERT'
masters = [master_hotspot_cnloh, master_IDH1, master_ATRX, master_TERT]
allMasters = pd.concat(masters)
plt.figure()
allMasters[['frac_genome_altered', 'data']].boxplot(by="data")
#### TP53 Residual Groups#fig=plt.figure(figsize=(10,3))
ax = plt.subplot2grid(shape=(4,1), loc=(0,0), colspan=1)
sns.boxplot(x='Patient_Current_Age',data=master_no_wgd_cancer, ax=ax).set_title('Patient Age - {}'.format(cancer), weight='bold', fontsize=14)
style(ax)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer,
group='tp53_res_group',
palette=palette_res,
order=res_group_list,
metrics='Patient_Current_Age',
figsize=(3,10),
title='Patient Current Age - {}'.format(cancer),
xlim=[20,100])
plt.show()
get_statistics(df=master_no_wgd_cancer,
group='tp53_res_group',
metrics='Patient_Current_Age',
group_list=['tp53_res', 'no_tp53_res'])#### TP53 Subgroups
fig, ax = boxplot_sampletype(df=master_no_wgd_cancer,
group='tp53_group',
palette=palette,
order=group_list,
metrics='Patient_Current_Age',
figsize=(7,10),
title='Patient Current Age - {}'.format(cancer),
xlim=[20,100])
plt.show()
get_statistics(df=master_no_wgd_cancer,
group='tp53_group',
metrics='Patient_Current_Age',
group_list=['1_WILD_TYPE', '>=1_cnLOH'])
h = get_groupby(master_no_wgd_cancer,'Sex', 'count').sort_values(by='count', ascending=False)
display(h)
h = h.T
h = h[['Male', 'Female']]
fig = plt.figure(figsize=(6,1))
ax = plt.subplot()
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
h_plot = h.plot(kind = 'barh', stacked=True, yticks=[], ax=ax)
ax.legend(['Male', 'Female'],loc='center left', bbox_to_anchor=(1.1, 0.5), fontsize=11)
ax.set_title('Sex Distribution - {} - No WGD'.format(cancer), weight='bold', fontsize=18)
plt.show()
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test
data = master_no_wgd_cancer.dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
data = data[['tp53_group', 'tp53_res_group', 'Overall Survival Status 0/1', 'Overall_Survival_Months']]
ix1 = data['tp53_res_group'] == 'tp53_res'
ix2 = data['tp53_res_group'] == 'no_tp53_res'
T_exp, E_exp = data.loc[ix1, 'Overall_Survival_Months'], data.loc[ix1, 'Overall Survival Status 0/1']
T_con, E_con = data.loc[ix2, 'Overall_Survival_Months'], data.loc[ix2, 'Overall Survival Status 0/1']
results = logrank_test(T_exp, T_con, event_observed_A=E_exp, event_observed_B=E_con)
results.print_summary()
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {}'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(res_group_list, range(len(res_group_list))):
data = master_no_wgd_cancer[master_no_wgd_cancer['tp53_res_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = res_palette_list[i], ax=ax)
plt.show()
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {}'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(group_list, range(len(group_list))):
data = master_no_wgd_cancer[master_no_wgd_cancer['tp53_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = palette_list[i], ax=ax)
plt.show()
master_IDH1 = get_master_codrivers(master=master_no_wgd_cancer,
maf=maf_cohort_nowgd,
symbol='IDH1')
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - IDH1'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(res_group_list, range(len(res_group_list))):
data = master_IDH1[master_IDH1['tp53_res_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = res_palette_list[i], ax=ax)
except: pass
plt.show()
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - IDH1'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(group_list, range(len(group_list))):
data = master_IDH1[master_IDH1['tp53_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = palette_list[i], ax=ax)
except: pass
plt.show()
master_TERT = get_master_codrivers(master=master_no_wgd_cancer,
maf=maf_cohort_nowgd,
symbol='TERT')
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - TERT'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(res_group_list, range(len(res_group_list))):
data = master_TERT[master_TERT['tp53_res_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = res_palette_list[i], ax=ax)
except: pass
plt.show()
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - TERT'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(group_list, range(len(group_list))):
data = master_TERT[master_TERT['tp53_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = palette_list[i], ax=ax)
except: pass
plt.show()
master_ATRX = get_master_codrivers(master=master_no_wgd_cancer,
maf=maf_cohort_nowgd,
symbol='ATRX')
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - ATRX'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(res_group_list, range(len(res_group_list))):
data = master_ATRX[master_ATRX['tp53_res_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = res_palette_list[i], ax=ax)
except: pass
plt.show()
fig = plt.figure(figsize=(10,7))
ax = fig.add_subplot(111)
fig.suptitle('Survival Analysis - {} - ATRX'.format(cancer), fontsize=16, weight='bold')
kmf = KaplanMeierFitter()
for group,i in zip(group_list, range(len(group_list))):
data = master_ATRX[master_ATRX['tp53_group'] == group].dropna(subset=['Overall_Survival_Months', 'Overall_Survival_Status'])
try:
data['Overall Survival Status 0/1'] = data.apply(lambda x: 1 if x['Overall_Survival_Status'] == 'DECEASED' else 0, axis=1)
kmf.fit(np.array(data['Overall_Survival_Months']), event_observed=np.array(data['Overall Survival Status 0/1']), label= group)
kmf.plot_survival_function(color = palette_list[i], ax=ax)
except: pass
plt.show()